EmbeddingLookup

传入一个矩阵和一组索引,根据给定的索引提取对应的行(向量),若此行不曾被标记为“已正则化”,则对此行进行正则化处理,将结果拼接输出,若已经正则化过则直接输出。

\[\begin{split}\forall k \in [1, ids\_size], \quad \begin{cases} \text{if } \textbf{is\_regulated}[i\_k] = 0, & \begin{cases} \displaystyle X_{i\_k} \leftarrow X_{i\_k} \cdot \frac{\text{max\_norm}} {\sum_{j=1}^{layer\_size\_} X_{i\_k, j}} \\[10pt] \textbf{is\_regulated}[i\_k] \leftarrow 1 \end{cases} \\[12pt] \text{输出向量 } Y_k \leftarrow X_{i\_k} \end{cases}\end{split}\]
输入:
  • input_data - 输入矩阵数据地址。

  • params - 以下参数打包
    • ids - 输入索引的存储地址。

    • max_norm - 最大范数约束。

    • is_regulated - 记录矩阵行是否被正则化的标志数组。

    • ids_size_ - 输入索引个数。

    • layer_size_ - 输入矩阵的列数。

    • layer_num_ - 输入矩阵的行数。

  • core_mask(int, 可选) - 核掩码(仅适用于共享存储版本)。

输出:
  • output - 结果输出地址。

支持平台:

FT78NE MT7004

备注

  • FT78NE 支持fp32

  • MT7004 支持fp16, fp32

共享存储版本:

void hp_embedding_lookup_s(float16 *input, float16 *output, unsigned long long *params, int core_mask);
void fp_embedding_lookup_s(float *input, float *output, unsigned long long *params, int core_mask);

C调用示例:

 1//FT78NE示例
 2#include <stdio.h>
 3#include <embeddinglookup.h>
 4
 5int main(int argc, char* argv[]) {
 6    float*       input_data = (float*)0x200000000;
 7    float*       input_data_s = (float*)0x210000000;
 8    int*         ids = (int *)0x220000000;
 9    float*   output_data = (float*)0x230000000;
10    float*   output_data_s = (float *)0x240000000;
11    bool*     is_regulated_ = (bool *)0x250000000;
12    int32_t*  ulp_err=(int32_t*)   0x410000000;
13    int         *layer_size = (int *)0x260000000;
14    int *layer_num = (int *)0x261000000;
15    float *max_norm = (float *)0x262000000;
16    int *ids_size = (int *)0x263000000;
17    unsigned long long params[8], tmp;
18    params[0] = (unsigned long long)ids;
19    params[1] = (unsigned long long)max_norm;
20    params[2] = (unsigned long long)is_regulated_;
21    params[3] = (unsigned long long)ids_size;
22    params[4] = (unsigned long long)layer_size;
23    params[5] = (unsigned long long)layer_num;
24    *max_norm = 8.5;
25        *ids_size = 3000;
26        *layer_size = 7;
27        *layer_num = n; // 1000~20000
28        int test_step = 17;
29    //other initials
30    fp_embedding_lookup_s(input_data,  output_data,params, mask);
31    return 0;
32}

私有存储版本:

void hp_embedding_lookup_p(float16 *input, float16 *output, unsigned long long *params);
void fp_embedding_lookup_p(float *input, float *output, unsigned long long *params);

C调用示例:

 1//FT78NE示例
 2#include <stdio.h>
 3#include <embeddinglookup.h>
 4
 5int main(int argc, char* argv[]) {
 6    float*       input_data = (float*)0x10000000;
 7    float*       input_data_s = (float*)0x10010000;
 8    int*         ids = (int *)0x10020000;
 9    float*   output_data = (float*)0x10030000;
10    float*   output_data_s = (float *)0x10040000;
11    bool*     is_regulated_ = (bool *)0x10050000;
12    int32_t*  ulp_err=(int32_t*)   0x110000000;
13    int         *layer_size = (int *)0x10060000;
14    int *layer_num = (int *)0x10061000;
15    float *max_norm = (float *)0x10062000;
16    int *ids_size = (int *)0x10063000;
17    unsigned long long params[8], tmp;
18    params[0] = (unsigned long long)ids;
19    params[1] = (unsigned long long)max_norm;
20    params[2] = (unsigned long long)is_regulated_;
21    params[3] = (unsigned long long)ids_size;
22    params[4] = (unsigned long long)layer_size;
23    params[5] = (unsigned long long)layer_num;
24    fp_embedding_lookup_p(input_data,output_data, params);
25    return 0;
26}